Effect of cohesin looping on genome architecture, from the perspective of the nuclear lamina.
In a previous documents, I prepared DamID objects for all bins and FPKM values for all genes. I will correlate these here.
Note that I tried many things in this document. It comes down to a very simple message: there is no enrichment for LAD genes upon CTCF / RAD21 perturbations.
Load (z-scale) DamID tracks and FPKM values. Correlate these in various ways.
Load the libraries and set the parameters.
# Load dependencies
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(GenomicRanges))
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(caTools))
suppressPackageStartupMessages(library(yaml))
suppressPackageStartupMessages(library(GGally))
# Prepare output
output.dir <- "ts220117_Correlating_Expression_and_NLinteractions"
dir.create(output.dir, showWarnings = FALSE)
# Load data
input.dir <- "ts220113_CTCF_enrichment_at_LAD_borders"
lads <- readRDS(file.path(input.dir, "LADs_pA.rds"))[[1]]
lads.border <- readRDS(file.path(input.dir, "LAD_borders_pA.rds"))[[1]]
input.dir <- "ts220113_effect_of_CTCF_depletion_on_LAD_borders"
bin.size <- readRDS(file.path(input.dir, "bin_size.rds"))
gr_damid <- readRDS(file.path(input.dir, "damid.rds"))
metadata <- readRDS(file.path(input.dir, "metadata_damid.rds"))
input.dir <- "ts220113_GeneExpression"
genes <- readRDS(file.path(input.dir, "genes.rds"))
genes.fpkm <- readRDS(file.path(input.dir, "genes_fpkm_mean.rds"))
genes.res <- readRDS(file.path(input.dir, "genes_results.rds"))
input.dir <- "ts220113_CTCF_sites_within_LADs"
genes_LAD_active <- readRDS(file.path(input.dir,
"genes_LAD_active.rds"))
genes_LAD_active_CTCFidx <- readRDS(file.path(input.dir,
"genes_LAD_active_CTCFidx.rds"))
genes_LAD_inactive <- readRDS(file.path(input.dir,
"genes_LAD_inactive.rds"))
genes_LAD_inactive_CTCFidx <- readRDS(file.path(input.dir,
"genes_LAD_inactive_CTCFidx.rds"))
input.dir <- "ts220117_Positioning_DifferentialGenes"
significant_tests <- readRDS(file.path(input.dir,
"significant_tests.rds"))
genes_expression_idx <- readRDS(file.path(input.dir,
"genes_expression_idx.rds"))
# Also process the nascent expression data
input.dir <- "ts220117_NascentExpression"
#genes <- readRDS(file.path(input.dir, "genes.rds"))
genes.fpkm.nascent <- readRDS(file.path(input.dir, "genes_fpkm_mean.rds"))
genes.res.nascent <- readRDS(file.path(input.dir, "genes_results.rds"))
# Also get the LAD differences
input.dir <- "ts220113_DamID_changes_versus_LAD_size_and_score"
gr_LAD_consensus <- readRDS(file.path(input.dir, "LADs_consensus.rds"))Prepare knitr.
library(knitr)
opts_chunk$set(fig.width = 10, fig.height = 4,
message = F, warning = F,
dev=c('png', 'pdf'), fig.path = file.path(output.dir, "figures/"))
pdf.options(useDingbats = FALSE)Functions.
GenesDamIDScores <- function(genes, gr, extend = 1e4) {
# Extend genes
genes.ovl <- genes
start(genes.ovl) <- start(genes.ovl) - extend
end(genes.ovl) <- end(genes.ovl) + extend
# Overlay genes with bins
ovl <- findOverlaps(genes.ovl, gr)
# Determine mean score per gene
tib <- as_tibble(mcols(gr))[subjectHits(ovl), ] %>%
add_column(idx = queryHits(ovl)) %>%
gather(key, value, -idx) %>%
group_by(idx, key) %>%
summarise(mean = mean(value, na.rm = T)) %>%
ungroup() %>%
mutate(key = factor(key, levels = names(mcols(gr)))) %>%
spread(key, mean)
# Prepare genes with scores
genes.damid <- genes
mcols(genes.damid) <- tib %>%
dplyr::select(-idx)
genes.damid
}First, calculate DamID score for every gene.
# Overlay genes with LADs and add distance to border
genes$LAD <- overlapsAny(genes, lads, ignore.strand = T)
dis <- distanceToNearest(genes, lads.border, ignore.strand = T)
genes$LAD_distance <- mcols(dis)$distance
# Score per gene
genes.damid <- GenesDamIDScores(genes, gr_damid)Compare changes in LaminB1 scores with changes in gene expression.
# Create various plots between a base sample and other samples
PlotExpressionVsDamID <- function(genes, res, expr, damid, base, samples,
min_expr = -1) {
# Combine into one tibble
tib <- tibble(gene_id = genes$gene_id,
LAD = genes$LAD,
LAD_distance = genes$LAD_distance,
expr_base = expr %>% pull(base),
damid_base = as_tibble(mcols(damid)) %>% pull(base)) %>%
mutate(LAD = LAD & (LAD_distance > 0)) %>%
bind_cols(expr %>%
select(samples) %>%
rename_all(function(x) paste0("expr_", samples))) %>%
bind_cols(as_tibble(mcols(damid)) %>%
select(samples) %>%
rename_all(function(x) paste0("damid_", samples))) %>%
mutate_at(vars(paste0("expr_", samples)),
function(x) log2(x+1) - log2(.$expr_base+1)) %>%
mutate_at(vars(paste0("damid_", samples)),
function(x) x - .$damid_base)
# Get differential results
sign <- lapply(samples,
function(x) { mcols(res)[, paste(x, "vs", base, "sign",
sep = "_")]})
# Gather
idx <- rowSums(expr > min_expr) > 0
sign <- factor(unlist(sign), levels = c("down", "stable", "up"))[idx]
tib_gather <- tib[idx, ] %>%
dplyr::select(-contains("base")) %>%
gather(key_expr, expr, contains("expr")) %>%
gather(key_damid, damid, contains("damid")) %>%
mutate(key_expr = str_remove(key_expr, "expr_"),
key_damid = str_remove(key_damid, "damid_")) %>%
filter(key_damid == key_expr) %>%
mutate(key_damid = factor(key_damid, levels = samples),
sign = sign)
# Plot
plt <- tib_gather %>%
ggplot(aes(x = damid, y = expr)) +
geom_point(aes(col = sign, alpha = sign)) +
geom_vline(xintercept = 0, linetype = "dashed", col = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", col = "blue") +
geom_smooth(method = "lm", col = "red") +
facet_grid(LAD ~ key_damid) +
scale_color_manual(values = c("blue", "black", "red")) +
scale_alpha_manual(values = c(0.5, 0.05, 0.5)) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Plot LAD & latest time point
tib_filter <- tib_gather %>%
filter(LAD == T, key_damid == tail(tib_gather$key_damid)[1]) %>%
drop_na()
plt <- tib_filter %>%
ggplot(aes(x = damid, y = expr)) +
geom_point(aes(col = sign, size = sign)) +
geom_vline(xintercept = 0, linetype = "dashed", col = "black") +
geom_hline(yintercept = 0, linetype = "dashed", col = "black") +
scale_color_manual(values = c("blue", "black", "red"),
labels = c(paste0("Down (n=", sum(tib_filter$sign == "down"), ")"),
paste0("Stable (n=", sum(tib_filter$sign == "stable"), ")"),
paste0("Up (n=", sum(tib_filter$sign == "up"), ")"))) +
scale_size_manual(values = c(1, 0.25, 1), guide = F) +
facet_grid(. ~ key_damid) +
ggtitle(paste0("Pearson=",
round(cor(tib_filter$expr, tib_filter$damid), 3),
"; p-value=",
signif(cor.test(tib_filter$expr, tib_filter$damid)$p.value, 2))) +
xlab("Delta DamID") +
ylab("Delta Expr") +
coord_cartesian(xlim = c(-2, 2), ylim = c(-3, 6)) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Plot density
plt <- tib_gather %>%
ggplot(aes(x = damid)) +
geom_density(aes(col = sign)) +
geom_vline(xintercept = 0, linetype = "dashed", col = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", col = "blue") +
facet_grid(LAD ~ key_damid) +
scale_color_manual(values = c("blue", "black", "red")) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
plt <- tib_gather %>%
ggplot(aes(x = damid)) +
stat_ecdf(aes(col = sign)) +
geom_vline(xintercept = 0, linetype = "dashed", col = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", col = "blue") +
facet_grid(LAD ~ key_damid) +
scale_color_manual(values = c("blue", "black", "red")) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Plot without stable group
plt <- tib_gather %>%
filter(sign != "stable") %>%
ggplot(aes(x = damid, y = expr)) +
geom_point(aes(col = sign, alpha = sign)) +
geom_vline(xintercept = 0, linetype = "dashed", col = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", col = "blue") +
#geom_smooth(method = "lm", col = "red") +
facet_grid(LAD ~ key_damid) +
scale_color_manual(values = c("blue", "red")) +
scale_alpha_manual(values = c(0.5, 0.5)) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Plot only changing NL genes
plt <- tib_gather %>%
filter(damid < -0.5 | damid > 0.5) %>%
ggplot(aes(x = damid, y = expr)) +
geom_point(size = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", col = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", col = "blue") +
#geom_smooth(method = "lm", col = "red") +
facet_grid(. ~ key_damid) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Plot density
plt <- tib_gather %>%
mutate(damid_group = case_when(damid < -0.5 ~ "NL_lower",
damid > 0.5 ~ "NL_higher",
T ~ "NL_stable"),
damid_group = factor(damid_group,
levels = c("NL_lower", "NL_stable", "NL_higher"))) %>%
ggplot(aes(x = expr)) +
geom_density(alpha = 0.2, aes(fill = damid_group)) +
geom_vline(xintercept = 0, linetype = "dashed", col = "blue") +
geom_hline(yintercept = 0, linetype = "dashed", col = "blue") +
facet_grid(. ~ key_damid) +
scale_fill_manual(values = c("blue", "black", "red")) +
coord_cartesian(xlim = c(-0.5, 0.5), ylim = c(0, 30)) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Print correlation between NL interactions and expression
tib_corr <- tib_gather %>%
drop_na() %>%
filter(LAD == T) %>%
group_by(key_damid) %>%
summarise(cor = cor(expr, damid, method = "pearson"),
pvalue = cor.test(expr, damid, method = "pearson")$p.value,
n = n()) %>%
ungroup() %>%
mutate(sign = pvalue < 0.01)
plt <- tib_corr %>%
ggplot(aes(x = key_damid, y = cor, fill = sign)) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_bar(stat = "identity") +
xlab("") +
ylab("Pearson correlation") +
scale_fill_grey() +
theme_bw() +
theme(aspect.ratio = 2,
axis.text.x = element_text(angle = 90, hjust = 1))
plot(plt)
tib_corr
# # Also, determine ratio up/down in iLADs and in LADs
# tib_ratio <- tib_gather %>%
# group_by(key_damid, LAD) %>%
# summarise(down = mean(sign == "down"),
# up = mean(sign == "up")) %>%
# ungroup() %>%
# gather(key_sign, fraction, down, up)
#
# plt <- tib_ratio %>%
# ggplot(aes(x = LAD, y = fraction, fill = key_sign)) +
# geom_bar(stat = "identity", position = "dodge") +
# #facet_wrap( ~ key_damid, scales = "free_y") +
# facet_grid(. ~ key_damid) +
# scale_fill_grey() +
# theme_classic() +
# theme(aspect.ratio = 3)
# plot(plt)
#
# # Repeat, the other way around
# tib_ratio <- tib_gather %>%
# group_by(key_damid, sign) %>%
# summarise(mean = mean(LAD)) %>%
# ungroup()
#
# plt <- tib_ratio %>%
# ggplot(aes(x = key_damid, y = mean, fill = sign)) +
# geom_bar(stat = "identity", position = "dodge") +
# xlab("") +
# ylab("Fraction in LAD") +
# scale_fill_manual(values = c("blue", "darkgrey", "red")) +
# theme_classic() +
# theme(aspect.ratio = 1)
# plot(plt)
}
PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "CTCFEL_0h",
samples = c("CTCFEL_6h", "CTCFEL_24h", "CTCFEL_96h"))## # A tibble: 3 × 5
## key_damid cor pvalue n sign
## <fct> <dbl> <dbl> <int> <lgl>
## 1 CTCFEL_6h -0.0298 3.37e- 2 5064 FALSE
## 2 CTCFEL_24h -0.00792 5.73e- 1 5056 FALSE
## 3 CTCFEL_96h -0.143 1.09e-24 5064 TRUE
PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "WAPL_0h",
samples = c("WAPL_6h", "WAPL_24h", "WAPL_96h"))## # A tibble: 3 × 5
## key_damid cor pvalue n sign
## <fct> <dbl> <dbl> <int> <lgl>
## 1 WAPL_6h -0.00413 0.769 5075 FALSE
## 2 WAPL_24h 0.0381 0.00660 5088 TRUE
## 3 WAPL_96h -0.0602 0.0000171 5087 TRUE
PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "CTCFWAPL_0h",
samples = c("CTCFWAPL_6h", "CTCFWAPL_24h", "CTCFWAPL_96h"))## # A tibble: 3 × 5
## key_damid cor pvalue n sign
## <fct> <dbl> <dbl> <int> <lgl>
## 1 CTCFWAPL_6h 0.000627 9.64e- 1 5078 FALSE
## 2 CTCFWAPL_24h 0.0406 3.85e- 3 5074 TRUE
## 3 CTCFWAPL_96h -0.120 7.46e-18 5083 TRUE
PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "RAD21_0h",
samples = c("RAD21_6h", "RAD21_24h"))## # A tibble: 2 × 5
## key_damid cor pvalue n sign
## <fct> <dbl> <dbl> <int> <lgl>
## 1 RAD21_6h -0.0113 4.21e- 1 5050 FALSE
## 2 RAD21_24h -0.0990 1.68e-12 5056 TRUE
# Filtered for expr > 2.5 (as previous document)
tib_corr_ctcf <- PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "CTCFEL_0h",
samples = c("CTCFEL_6h", "CTCFEL_24h", "CTCFEL_96h"),
min_expr = 2.5)tib_corr_wapl <- PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "WAPL_0h",
samples = c("WAPL_6h", "WAPL_24h", "WAPL_96h"),
min_expr = 2.5)tib_corr_ctcfwapl <- PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "CTCFWAPL_0h",
samples = c("CTCFWAPL_6h", "CTCFWAPL_24h", "CTCFWAPL_96h"),
min_expr = 2.5)tib_corr_rad21 <- PlotExpressionVsDamID(genes, genes.res, genes.fpkm, genes.damid,
base = "RAD21_0h",
samples = c("RAD21_6h", "RAD21_24h"),
min_expr = 2.5)# Combine correlations in one final plot
tib_corr <- bind_rows(tib_corr_ctcf,
tib_corr_wapl,
tib_corr_ctcfwapl,
tib_corr_rad21) %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
# filter for "significant" tests only
filter(key_damid %in% str_remove(significant_tests, "_vs.*")) %>%
separate(key_damid, c("target", "timepoint"), remove = F) %>%
mutate(target = str_remove(target, "EL"),
target = factor(target, c("CTCF", "RAD21", "WAPL", "CTCFWAPL")),
timepoint = factor(timepoint, c("24h", "48h", "96h"))) %>%
arrange(target, timepoint) %>%
mutate(key_damid = factor(key_damid, unique(key_damid)))
# Plot
tib_corr %>%
ggplot(aes(x = key_damid, y = cor, fill = sign)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("grey80", "grey30")) +
xlab("") +
ylab("Pearson correlation") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))Many plots. As expected, there is correlation between changes in expression and changes in nuclear positioning. This is normal.
Plot the DamID changes for differentially expressed genes. This is another way to show the same thing: differentially expressed genes also changes their lamina positioning.
# 1) Determine DamID changes with t=0h
tib_damid_changes <- as_tibble(mcols(genes.damid)) %>%
mutate(CTCFEL_96h = CTCFEL_96h - CTCFEL_0h,
RAD21_24h = RAD21_24h - RAD21_0h,
WAPL_96h = WAPL_96h - WAPL_0h,
CTCFWAPL_24h = CTCFWAPL_24h - CTCFWAPL_0h,
CTCFWAPL_96h = CTCFWAPL_96h - CTCFWAPL_0h,
gene = 1:nrow(.)) %>%
dplyr::select(CTCFEL_96h, RAD21_24h, WAPL_96h,
CTCFWAPL_24h, CTCFWAPL_96h, gene) %>%
gather(key, damid, -gene)
# 2) Get significant changes
tib_genes_sign <- as_tibble(genes.res) %>%
dplyr::select(matches(paste(significant_tests, collapse = "|"))) %>%
dplyr::select(contains("sign")) %>%
dplyr::select(-contains("48h")) %>%
mutate(gene = 1:nrow(.)) %>%
gather(key, gene_sign, -gene) %>%
mutate(key = str_remove(key, "_vs.*"))
# 3) Join the two and filter for selected genes (expression > cutoff)
tib_combined <- full_join(tib_genes_sign, tib_damid_changes) %>%
#filter(gene %in% which(genes_expression_idx)) %>%
separate(key, c("target", "timepoint"), remove = F) %>%
mutate(target = str_remove(target, "EL"),
target = factor(target, c("CTCF", "RAD21", "WAPL", "CTCFWAPL")),
timepoint = factor(timepoint, c("24h", "48h", "96h"))) %>%
arrange(gene, target, timepoint) %>%
mutate(key = factor(key, unique(key)))
# LAD genes only?
lad_idx <- which(overlapsAny(genes.damid, gr_LAD_consensus,
ignore.strand = T))
#lad_idx <- which(rowSums(as_tibble(mcols(genes.damid)) > 0) > 0)
tib_combined <- tib_combined %>%
filter(gene %in% lad_idx)
# 4) Plot
tib_combined %>%
ggplot(aes(x = target, y = damid, fill = gene_sign)) +
geom_boxplot(outlier.shape = NA, position = "dodge") +
geom_hline(yintercept = 0, col = "grey30", linetype = "dashed") +
facet_grid(. ~ timepoint, scales = "free_x", space = "free") +
xlab("") +
ylab("pA-DamID difference with t=0h") +
scale_fill_manual(values = c("blue", "grey50", "red"),
name = "Class") +
coord_cartesian(ylim = c(-1.3, 1.3)) +
theme_bw() +
theme(#aspect.ratio = 2/3,
axis.text.x = element_text(angle = 90, hjust = 1))# 5) Calculate statistics for illustrator (lazy approach)
tib_pvalues <- tibble()
for (tmp_key in unique(tib_combined$key)) {
tmp <- tib_combined %>%
filter(key == tmp_key)
for (tmp_sign in c("up", "down")) {
test <- wilcox.test(tmp$damid[tmp$gene_sign == "stable"],
tmp$damid[tmp$gene_sign == tmp_sign],
conf.int = TRUE)
tib_pvalues <- bind_rows(tib_pvalues,
tibble(key = tmp_key,
gene_sign = tmp_sign,
n_sign = sum(tmp$gene_sign == tmp_sign),
pvalue = test$p.value,
direction = ifelse(test$estimate > 0,
"up", "down")))
}
}
# Multiple testing
tib_pvalues %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)## # A tibble: 10 × 7
## key gene_sign n_sign pvalue direction padj sign
## <chr> <chr> <int> <dbl> <chr> <dbl> <lgl>
## 1 CTCFEL_96h up 405 1.64e-23 up 1.48e-22 TRUE
## 2 CTCFEL_96h down 73 5.17e- 6 down 3.10e- 5 TRUE
## 3 RAD21_24h up 183 4.58e- 7 up 3.67e- 6 TRUE
## 4 RAD21_24h down 23 8.82e- 2 down 3.53e- 1 FALSE
## 5 WAPL_96h up 514 7.53e- 7 up 5.27e- 6 TRUE
## 6 WAPL_96h down 91 6.34e- 1 down 6.35e- 1 FALSE
## 7 CTCFWAPL_24h up 29 3.17e- 1 down 6.35e- 1 FALSE
## 8 CTCFWAPL_24h down 21 2.79e- 3 down 1.39e- 2 TRUE
## 9 CTCFWAPL_96h up 841 2.22e-28 up 2.22e-27 TRUE
## 10 CTCFWAPL_96h down 406 1.90e- 1 down 5.70e- 1 FALSE
Yes. As I said. Do the same thing for nascent expression changes.
# 1) Determine DamID changes with t=0h
tib_damid_changes <- as_tibble(mcols(genes.damid)) %>%
mutate(WAPL_6h = WAPL_6h - WAPL_0h,
WAPL_24h = WAPL_24h - WAPL_0h,
gene = 1:nrow(.)) %>%
dplyr::select(WAPL_6h, WAPL_24h, gene) %>%
gather(key, damid, -gene)
# 2) Get significant changes
tib_genes_sign <- as_tibble(genes.res.nascent) %>%
dplyr::select(contains("sign")) %>%
mutate(gene = 1:nrow(.)) %>%
gather(key, gene_sign, -gene) %>%
mutate(key = str_remove(key, "_vs.*"))
# 3) Join the two and filter for selected genes (expression > cutoff)
tib_combined <- full_join(tib_genes_sign, tib_damid_changes) %>%
#filter(gene %in% which(genes_expression_idx)) %>%
separate(key, c("target", "timepoint"), remove = F) %>%
mutate(target = factor(target, "WAPL"),
timepoint = factor(timepoint, c("6h" ,"24h"))) %>%
arrange(gene, target, timepoint) %>%
mutate(key = factor(key, unique(key)))
# LAD genes only?
tib_combined <- tib_combined %>%
filter(gene %in% lad_idx)
# 4) Plot
tib_combined %>%
ggplot(aes(x = key, y = damid, fill = gene_sign)) +
geom_boxplot(outlier.shape = NA, position = "dodge") +
geom_hline(yintercept = 0, col = "grey30", linetype = "dashed") +
xlab("") +
ylab("pA-DamID difference with t=0h") +
scale_fill_manual(values = c("blue", "grey50", "red"),
name = "Class") +
coord_cartesian(ylim = c(-1, 1)) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))# 5) Calculate statistics for illustrator (lazy approach)
tib_pvalues <- tibble()
for (tmp_key in unique(tib_combined$key)) {
tmp <- tib_combined %>%
filter(key == tmp_key)
for (tmp_sign in c("up", "down")) {
test <- wilcox.test(tmp$damid[tmp$gene_sign == "stable"],
tmp$damid[tmp$gene_sign == tmp_sign],
conf.int = TRUE)
tib_pvalues <- bind_rows(tib_pvalues,
tibble(key = tmp_key,
gene_sign = tmp_sign,
n_sign = sum(tmp$gene_sign == tmp_sign),
pvalue = test$p.value,
direction = ifelse(test$estimate > 0,
"up", "down")))
}
}
# Multiple testing
tib_pvalues %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)## # A tibble: 4 × 7
## key gene_sign n_sign pvalue direction padj sign
## <chr> <chr> <int> <dbl> <chr> <dbl> <lgl>
## 1 WAPL_6h up 11 0.720 up 1 FALSE
## 2 WAPL_6h down 9 0.767 down 1 FALSE
## 3 WAPL_24h up 92 0.476 down 1 FALSE
## 4 WAPL_24h down 30 0.347 down 1 FALSE
Here, the picture is less clear. It seems that other effects (i.e. CTCF detachment) are stronger than the transcription effect on lamina interactions.
Skipped.
I want to show that differential genes are not causing the LAD changes I observed previously. This is the main thing that I want to convey. But how to show this?
To do this, I will determine the change in LAD score for LADs with up and downregulated genes. If the LAD score is not correlated with the presence of transcriptional differences, I can conclude that transcription is not causing the change in nuclear positioning.
# Get the results
significant_tests_filtered <- grep("48h", significant_tests,
invert = T, value = T)
tib_results <- as_tibble(mcols(genes.res)) %>%
dplyr::select(contains("sign")) %>%
dplyr::select(matches(paste(significant_tests_filtered, collapse = "|"))) %>%
dplyr::rename_all(function(x) str_remove(x, "_vs.*"))
# Count number of genes per differential LAD
tib <- as_tibble(mcols(gr_LAD_consensus)) %>%
mutate(gene_total = countOverlaps(gr_LAD_consensus, genes.res,
ignore.strand = T))
tib_combined <- tibble()
for (t in names(tib_results)) {
# Determine the number of up and downregulated genes - and a "LAD summary"
tib <- tib %>%
bind_cols(tibble(up = countOverlaps(gr_LAD_consensus,
genes.res[tib_results %>% pull(t) == "up"],
ignore.strand = T),
down = countOverlaps(gr_LAD_consensus,
genes.res[tib_results %>% pull(t) == "down"],
ignore.strand = T)) %>%
mutate(class = case_when((up > 0) & (down > 0) ~ "both",
up > down ~ "up",
down > up ~ "down",
#(up > 0) & (down > 0) ~ "both",
T ~ "stable"),
class = factor(class, c("down", "stable", "up", "both")),
diff = tib %>% pull(t) -
tib %>% pull(str_replace(t, "_.*", "_0h"))) %>%
dplyr::rename_all(function(x) paste(t, x, sep = "_")))
# Plot immediately
plt <- tib %>%
dplyr::rename_all(function(x) str_remove(x, paste0(t, "_"))) %>%
ggplot(aes(x = class, y = diff, fill = class)) +
geom_boxplot(outlier.shape = NA) +
ggtitle(t) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Add to tib combined with all the details
tib_combined <- bind_rows(tib_combined,
tib %>%
dplyr::rename_all(function(x) str_remove(x, paste0(t, "_"))) %>%
dplyr::select(class, diff, up, down) %>%
mutate(test = t))
}# Remove LADs with up and downregulated genes
tib_combined <- tib_combined %>%
filter(class != "both")
# Plot combined
tib_combined %>%
separate(test, c("condition", "timepoint"), remove = F) %>%
mutate(condition = factor(condition, levels(metadata$condition)),
timepoint = factor(timepoint, levels(metadata$timepoint))) %>%
arrange(condition, timepoint) %>%
ggplot(aes(x = condition, y = diff, fill = class)) +
geom_boxplot(outlier.shape = NA, position = "dodge") +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
facet_grid(. ~ timepoint, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(-1.4, 1.4)) +
xlab("") +
ylab("LAD difference") +
scale_fill_manual(values = c("blue", "grey50", "red"),
name = "Class") +
theme_bw() +
theme(# aspect.ratio = 2/3,
axis.text.x = element_text(angle = 90, hjust = 1))# Calculate statistics for illustrator (lazy approach)
tib_pvalues <- tibble()
for (tmp_key in unique(tib_combined$test)) {
tmp <- tib_combined %>%
filter(test == tmp_key)
for (tmp_sign in c("up", "down")) {
test <- wilcox.test(tmp$diff[tmp$class == "stable"],
tmp$diff[tmp$class == tmp_sign],
conf.int = TRUE)
tib_pvalues <- bind_rows(tib_pvalues,
tibble(key = tmp_key,
class = tmp_sign,
n_sign = sum(tmp$class == tmp_sign),
pvalue = test$p.value,
direction = ifelse(test$estimate > 0,
"up", "down")))
}
}
# Multiple testing
tib_pvalues %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)## # A tibble: 10 × 7
## key class n_sign pvalue direction padj sign
## <chr> <chr> <int> <dbl> <chr> <dbl> <lgl>
## 1 CTCFEL_96h up 238 7.62e- 1 up 1 e+ 0 FALSE
## 2 CTCFEL_96h down 47 1.88e- 5 down 1.69e- 4 TRUE
## 3 WAPL_96h up 273 2.82e- 3 up 1.97e- 2 TRUE
## 4 WAPL_96h down 43 1.15e- 2 up 6.89e- 2 FALSE
## 5 CTCFWAPL_24h up 28 1.48e- 1 up 7.42e- 1 FALSE
## 6 CTCFWAPL_24h down 20 9.66e- 1 up 1 e+ 0 FALSE
## 7 CTCFWAPL_96h up 317 1.07e-17 up 1.07e-16 TRUE
## 8 CTCFWAPL_96h down 100 4.29e- 1 down 1 e+ 0 FALSE
## 9 RAD21_24h up 153 1.60e- 3 up 1.28e- 2 TRUE
## 10 RAD21_24h down 19 8.33e- 1 down 1 e+ 0 FALSE
Good. As hoped, most LAD changes are not correlated with the presence of differentially expressed genes.
Repeat for nascent transcription.
# Get the results
tib_results <- as_tibble(mcols(genes.res.nascent)) %>%
dplyr::select(contains("sign")) %>%
dplyr::rename_all(function(x) str_remove(x, "_vs.*"))
# Count number of genes per differential LAD
tib <- as_tibble(mcols(gr_LAD_consensus)) %>%
mutate(gene_total = countOverlaps(gr_LAD_consensus, genes.res.nascent,
ignore.strand = T))
tib_combined <- tibble()
for (t in names(tib_results)) {
# Determine the number of up and downregulated genes - and a "LAD summary"
tib <- tib %>%
bind_cols(tibble(up = countOverlaps(gr_LAD_consensus,
genes.res.nascent[tib_results %>% pull(t) == "up"],
ignore.strand = T),
down = countOverlaps(gr_LAD_consensus,
genes.res.nascent[tib_results %>% pull(t) == "down"],
ignore.strand = T)) %>%
mutate(class = case_when((up > 0) & (down > 0) ~ "both",
up > down ~ "up",
down > up ~ "down",
#(up > 0) & (down > 0) ~ "both",
T ~ "stable"),
class = factor(class, c("down", "stable", "up", "both")),
diff = tib %>% pull(t) -
tib %>% pull(str_replace(t, "_.*", "_0h"))) %>%
dplyr::rename_all(function(x) paste(t, x, sep = "_")))
# Plot immediately
plt <- tib %>%
dplyr::rename_all(function(x) str_remove(x, paste0(t, "_"))) %>%
ggplot(aes(x = class, y = diff, fill = class)) +
geom_boxplot(outlier.shape = NA) +
ggtitle(t) +
theme_bw() +
theme(aspect.ratio = 1)
plot(plt)
# Add to tib combined with all the details
tib_combined <- bind_rows(tib_combined,
tib %>%
dplyr::rename_all(function(x) str_remove(x, paste0(t, "_"))) %>%
dplyr::select(class, diff, up, down) %>%
mutate(test = t))
}# Remove LADs with up and downregulated genes
tib_combined <- tib_combined %>%
filter(class != "both")
# Plot combined
tib_combined %>%
filter(class != "both") %>%
separate(test, c("condition", "timepoint"), remove = F) %>%
mutate(condition = factor(condition, levels(metadata$condition)),
timepoint = factor(timepoint, levels(metadata$timepoint))) %>%
arrange(condition, timepoint) %>%
ggplot(aes(x = timepoint, y = diff, fill = class)) +
geom_boxplot(outlier.shape = NA, position = "dodge") +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
#facet_grid(. ~ timepoint, scales = "free_x", space = "free") +
coord_cartesian(ylim = c(-0.8, 0.8)) +
xlab("") +
ylab("LAD difference") +
scale_fill_manual(values = c("blue", "grey50", "red"),
name = "Class") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))# Calculate statistics for illustrator (lazy approach)
tib_pvalues <- tibble()
for (tmp_key in unique(tib_combined$test)) {
tmp <- tib_combined %>%
filter(test == tmp_key)
for (tmp_sign in c("up", "down")) {
test <- wilcox.test(tmp$diff[tmp$class == "stable"],
tmp$diff[tmp$class == tmp_sign],
conf.int = TRUE)
tib_pvalues <- bind_rows(tib_pvalues,
tibble(key = tmp_key,
class = tmp_sign,
n_sign = sum(tmp$class == tmp_sign),
pvalue = test$p.value,
direction = ifelse(test$estimate > 0,
"up", "down")))
}
}
# Multiple testing
tib_pvalues %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)## # A tibble: 4 × 7
## key class n_sign pvalue direction padj sign
## <chr> <chr> <int> <dbl> <chr> <dbl> <lgl>
## 1 WAPL_6h up 11 0.134 up 0.425 FALSE
## 2 WAPL_6h down 9 0.106 up 0.425 FALSE
## 3 WAPL_24h up 73 0.129 up 0.425 FALSE
## 4 WAPL_24h down 24 0.607 up 0.607 FALSE
These results show that differential genes are mostly not correlated with LAD changes. This is roughly the same message as written in the manuscript right now, but is slightly more accurate (and easy to interpret) in my opinion. Also, it’s easier to include this result in the story.
saveRDS(genes.damid,
file.path(output.dir, "genes_damid.rds"))Good. Conclusions:
sessionInfo()## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] knitr_1.33 GGally_2.1.2 yaml_2.2.1
## [4] caTools_1.18.2 rtracklayer_1.50.0 GenomicRanges_1.42.0
## [7] GenomeInfoDb_1.26.7 IRanges_2.24.1 S4Vectors_0.28.1
## [10] BiocGenerics_0.36.1 forcats_0.5.1 stringr_1.4.0
## [13] dplyr_1.0.7 purrr_0.3.4 readr_2.0.0
## [16] tidyr_1.1.3 tibble_3.1.6 ggplot2_3.3.5
## [19] tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 matrixStats_0.60.0
## [3] fs_1.5.0 lubridate_1.7.10
## [5] RColorBrewer_1.1-2 httr_1.4.2
## [7] tools_4.0.5 backports_1.2.1
## [9] bslib_0.2.5.1 utf8_1.2.2
## [11] R6_2.5.1 DBI_1.1.1
## [13] colorspace_2.0-2 withr_2.4.2
## [15] tidyselect_1.1.1 compiler_4.0.5
## [17] cli_3.1.0 rvest_1.0.1
## [19] Biobase_2.50.0 xml2_1.3.2
## [21] DelayedArray_0.16.3 labeling_0.4.2
## [23] sass_0.4.0 scales_1.1.1
## [25] digest_0.6.28 Rsamtools_2.6.0
## [27] rmarkdown_2.11 XVector_0.30.0
## [29] pkgconfig_2.0.3 htmltools_0.5.1.1
## [31] MatrixGenerics_1.2.1 highr_0.9
## [33] dbplyr_2.1.1 rlang_0.4.12
## [35] readxl_1.3.1 rstudioapi_0.13
## [37] farver_2.1.0 jquerylib_0.1.4
## [39] generics_0.1.0 jsonlite_1.7.2
## [41] BiocParallel_1.24.1 RCurl_1.98-1.3
## [43] magrittr_2.0.1 GenomeInfoDbData_1.2.4
## [45] Matrix_1.3-4 Rcpp_1.0.7
## [47] munsell_0.5.0 fansi_0.5.0
## [49] lifecycle_1.0.1 stringi_1.7.3
## [51] SummarizedExperiment_1.20.0 zlibbioc_1.36.0
## [53] plyr_1.8.6 grid_4.0.5
## [55] crayon_1.4.2 lattice_0.20-44
## [57] Biostrings_2.58.0 haven_2.4.1
## [59] hms_1.1.0 pillar_1.6.4
## [61] reprex_2.0.0 XML_3.99-0.6
## [63] glue_1.5.0 evaluate_0.14
## [65] modelr_0.1.8 vctrs_0.3.8
## [67] tzdb_0.1.2 cellranger_1.1.0
## [69] gtable_0.3.0 reshape_0.8.8
## [71] assertthat_0.2.1 xfun_0.24
## [73] broom_0.7.9 GenomicAlignments_1.26.0
## [75] ellipsis_0.3.2